import pandas as pd
import glob, folium, branca, json
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter, Legend
from bokeh.layouts import column
print(np.datetime64('now'))
2021-02-16T13:19:21
# PUT EVERY COUNTRY REPORTS IN ONE DATAFRAME
files = glob.glob('data/install*country.csv')
files.sort()
ds=pd.DataFrame()
for f in files:
ds = pd.concat([ds,pd.read_csv(f,encoding = 'utf-16')])
ds = ds.reset_index()
ds = ds.drop(columns=['index','Daily Device Upgrades','Total User Installs','Active Device Installs','Install events','Update events','Uninstall events'])
ds['Date']=pd.DatetimeIndex(ds['Date'])
ds['Cumul Users']=ds['Daily User Installs'].cumsum()
ds['Cumul Devices']=ds['Daily Device Installs'].cumsum()
ds.tail()
| Date | Package Name | Country | Daily Device Installs | Daily Device Uninstalls | Daily User Installs | Daily User Uninstalls | Cumul Users | Cumul Devices | |
|---|---|---|---|---|---|---|---|---|---|
| 998 | 2021-02-13 | com.kb.android.argo | NO | 0 | 0 | 0 | 0 | 76 | 81 |
| 999 | 2021-02-13 | com.kb.android.argo | RU | 0 | 0 | 0 | 0 | 76 | 81 |
| 1000 | 2021-02-13 | com.kb.android.argo | SI | 0 | 0 | 0 | 0 | 76 | 81 |
| 1001 | 2021-02-13 | com.kb.android.argo | UA | 1 | 0 | 1 | 2 | 77 | 82 |
| 1002 | 2021-02-13 | com.kb.android.argo | US | 0 | 0 | 0 | 0 | 77 | 82 |
for country in np.unique(ds['Country'][~ds['Country'].isna()]):
ds[country+'-d']=ds[ds['Country']==country]['Daily Device Installs'].cumsum()
ds[country+'-u']=ds[ds['Country']==country]['Daily User Installs'].cumsum()
ds = ds.fillna(method='ffill')
ds = ds.fillna(0)
ds.tail()
| Date | Package Name | Country | Daily Device Installs | Daily Device Uninstalls | Daily User Installs | Daily User Uninstalls | Cumul Users | Cumul Devices | AU-d | ... | RU-d | RU-u | SI-d | SI-u | SV-d | SV-u | UA-d | UA-u | US-d | US-u | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 998 | 2021-02-13 | com.kb.android.argo | NO | 0 | 0 | 0 | 0 | 76 | 81 | 3.0 | ... | 5.0 | 5.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 7.0 | 5.0 |
| 999 | 2021-02-13 | com.kb.android.argo | RU | 0 | 0 | 0 | 0 | 76 | 81 | 3.0 | ... | 5.0 | 5.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 7.0 | 5.0 |
| 1000 | 2021-02-13 | com.kb.android.argo | SI | 0 | 0 | 0 | 0 | 76 | 81 | 3.0 | ... | 5.0 | 5.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 7.0 | 5.0 |
| 1001 | 2021-02-13 | com.kb.android.argo | UA | 1 | 0 | 1 | 2 | 77 | 82 | 3.0 | ... | 5.0 | 5.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 7.0 | 5.0 |
| 1002 | 2021-02-13 | com.kb.android.argo | US | 0 | 0 | 0 | 0 | 77 | 82 | 3.0 | ... | 5.0 | 5.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 7.0 | 5.0 |
5 rows × 67 columns
output_notebook()
countries = np.unique(ds['Country'].fillna('undef'))
labels = ["%s" % c for c in countries]
code = 'FR'
plot_options = dict(width=1200, plot_height=400, tools='pan,wheel_zoom,box_zoom', active_scroll="wheel_zoom", x_axis_type='datetime', toolbar_location="above")
p = figure(**plot_options)
p.line(x="Date", y='Cumul Users', source=ds,legend_label='Total Users',line_color='black',line_width=3)
p.line(x="Date", y='Cumul Devices', source=ds,legend_label='Total Devices',line_color='grey',line_width=3)
p.line(x="Date", y=code+'-u', source=ds[ds['Country']==code],legend_label=code+' Users',line_color='#ff6f69',line_width=3)
p.line(x="Date", y=code+'-d', source=ds[ds['Country']==code],legend_label=code+' Devices',line_color='#88d8b0',line_width=3)
p.xaxis.formatter=DatetimeTickFormatter(hours=["%d %B %Y"],days=["%d %B %Y"],months=["%d %B %Y"],years=["%d %B %Y"])
p.legend.location = 'top_left'
show(p)
df = pd.DataFrame(ds.groupby('Country').sum()['Daily User Installs'])
color_scale = branca.colormap.linear.viridis.scale(0,20)
map_dict = df.to_dict()
def get_color(feature):
value = map_dict['Daily User Installs'].get(feature['properties']['ISO_A2'])
if value is None:
return 'white' # MISSING -> white
else:
#print(feature['properties']['ADMIN']+' : '+str(value))
return color_scale(value)
m = folium.Map(
location = [0, 0],
tiles="cartodbpositron",
zoom_start = 2
)
folium.GeoJson(
data = 'countries.json',
style_function = lambda feature: {
'fillColor': get_color(feature),
'fillOpacity': 0.7,
'color' : 'None',
'weight' : 1,
}
).add_to(m)
m.add_child(color_scale)
m